##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 3: Wordscores Replication - Sentiment

rm(list = ls(all = TRUE))
library(rstudioapi)

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))
load("3_positions_before_senti.RData")

detach("package:quanteda", unload=TRUE)
remove.packages("quanteda")
require(devtools)
install_version("quanteda", version = "1.1.1", repos = "http://cran.us.r-project.org")
require(quanteda)
require(tidyverse)
require(reshape2)


# Run Sentiment and save in the same way as Wordscores
sentiResults <- list()
cat("Computing Sentiment: \n")
for (yr in 1983:2013) {
  cat("  ...", yr, "\n")
  senti <- dfm(corpus_subset(budgetCorpus, budget_year==yr & 
                               !leas_cheann_comhairle &
                               !cheann_comhairle),
               groups="memberID", verbose=FALSE, dictionary = data_dictionary_LSD2015)
  minWordsThreshold <- 200
  senti <- senti[rowSums(dfm(corpus_subset(budgetCorpus, budget_year==yr & 
                                             !leas_cheann_comhairle &
                                             !cheann_comhairle),
                             groups="memberID", verbose=FALSE)) > minWordsThreshold, ]
  senti.data <- as.data.frame(senti)
  senti.data$Sentiment=log((senti.data$positive+0.5)/(senti.data$negative+0.5))
  senti.data$budget_year=yr
  senti.data$memberID=as.numeric(row.names(senti.data))
  sentiResults=rbind(sentiResults, data.frame(budget_year = yr,
                                              memberID = as.numeric(docnames(senti)),
                                              Sentiment = senti.data$Sentiment))
}
cat("finished.\n")
textResults.store=textResults
textResults=left_join(textResults,sentiResults)
#There are more Sentiment positions than Wordscores positions because we don't use the minimum word threshold

save(budgetCorpus,
     d,
     dupls,
     duplslist,
     fm.data,
     multidept,
     os.data,
     senti,
     senti.data,
     sentiResults,
     textResults,
     textResults.store,
     thisdfm,
     ts,
     ws,
     yearspeakerData,
     inMoreThanOneDept,
     minWordsThreshold,
     refscores,
     thisrefGovt,
     thisrefOpp,
     varstokeep,
     file="3_positions_after_senti.RData")

